import json
import scrapy
from scrapy import cmdline
from scrapy.http import HtmlResponse


class JcInfoSpider(scrapy.Spider):
    name = "jc_info"

    # allowed_domains = ["www.cninfo.com.cn"]
    # start_urls = ["http://www.cninfo.com.cn/"]

    def start_requests(self):
        url = 'http://www.cninfo.com.cn/new/disclosure'
        for page in range(1, 19):
            form_data = {
                "column": "szse_latest",
                "pageNum": str(page),
                "pageSize": "30",
                "sortName": "",
                "sortType": "",
                "clusterFlag": "true"
            }

            # 专门发送带有表单信息的链接请求
            yield scrapy.FormRequest(url=url, formdata=form_data)

    def parse(self, response: HtmlResponse, **kwargs):
        print(response.json())


if __name__ == '__main__':
    cmdline.execute('scrapy crawl jc_info'.split())


"""
完善当前案例,将解析出来的数据保存在mongodb中
"""